library(phyloseq)
library(reshape2)
library(tidyverse)
library(vegan)
library(HTSSIP)
library(ape)
library(CoDaSeq)
library(philr)
library(ggtree)
library(cowplot)
library(ggplot2)
library(viridis)
library(treeio)
library(dplyr)
library(microbiome)
library(treeio)
#!/bin/bash
#SBATCH --partition=compute
#SBATCH --ntasks=1
#SBATCH --mem=20gb
#SBATCH --cpus-per-task=20
#SBATCH --time=04:35:00
module load anaconda/5.1
source activate qiime2-2018.8
echo merging and clustering
qiime dada2 denoise-paired \
--i-demultiplexed-seqs paired-end-demux.qza \
--p-n-threads 20 \
--p-trunc-q 2 \
--p-trim-left-f 19 \
--p-trim-left-r 20 \
--p-trunc-len-f 200 \
--p-trunc-len-r 160 \
--p-max-ee 5 \
--p-n-reads-learn 1000000 \
--p-chimera-method consensus \
--o-table asv_table.qza \
--o-representative-sequences rep-seqs.qza \
--o-denoising-stats ASVs3/stats-dada2.qza
echo classifying taxa
qiime feature-classifier classify-sklearn \
--i-classifier ../21Oct21/tagseq-qiime2-snakemake/silva_all.qza \
--i-reads rep-seqs.qza \
--o-classification asv_tax_sklearn.qza
qiime tools export --input-path asv_table.qza \
--output-path asv_table
biom convert -i asv_table/feature-table.biom -o asv_table/asv-table.tsv --to-tsv
qiime tools export --input-path asv_tax_sklearn.qza --output-path asv_tax_dir
qiime phylogeny align-to-tree-mafft-fasttree \
--i-sequences rep-seqs.qza \
--output-dir mafft-fasttree-output
setwd("/Users/oliviaahern/Documents/GitHub/SIP_CopyNumber")
{
# file contains info on MC2, Chemostat, measurements taken every 15 minutes
data=read.csv(file="datafiles/m2_do_02_edit.csv",header=T)
# subset to get the right time frame
read=subset(data, T<275)
time=read$T
ph= read$pH.M2
do=read$DO.M2..uM.
heado=read$O2.M2....
heado2=read$CO2.M2....
par(mar=c(2,7,1,5), mfrow=c(3,1),xpd=F)
plot(time, do,type='l',col='blue',xlab=" ", ylab = "", xlim=c(-450,240), xaxt='n')
rect(xleft=0, xright=240, ybottom=111, ytop=300, col= rgb(0.7,0.7,0.7,alpha=0.2),
border=NA)
mtext(side=2,"Dissolved Oxygen (uM)", col = 'blue', padj=-3)
axis(side=1, at=c(-450,-144, 0, 24, 48, 72, 120, 240))
par(new = TRUE)
plot(time, ph,col='red',type = "l", axes = FALSE, bty = "n", xlab = "", ylab = "")
mtext(side=4, "pH", col ='red', line=3)
axis(side=4, at = c(6.5, 6.7, 7, 7.2, 7.4))
mtext("a", adj=-.10,padj=-10,side=1,srt=-90,cex=1.5,font=1)
legend(25,7.4,legend=c("12C Glucose", "13C Glucose"), pch=22, pt.bg=c("white",'gray70'), bt='n')
plot(time, heado,type='l',col='navy',xlab=" ", ylab = "", xlim=c(-450,240), xaxt='n')
rect(xleft=0, xright=240, ybottom=0, ytop=300, col= rgb(0.7,0.7,0.7,alpha=0.2),border=NA)
mtext(side=2,"Oxygen (%)", col = 'navy', padj=-3)
axis(side=1, at=c(-450,-144, 0, 24, 48, 72, 120, 240))
par(new = TRUE)
plot(time, heado2,col='forestgreen',type = "l", axes = FALSE, bty = "n", xlab = "", ylab = "")
mtext(side=4, "CO2 (%)", col ='forestgreen', line=3)
axis(side=4, at = c(0,0.1,0.2,0.3,0.4))
mtext("b", adj=-.1,padj=-10,side=1,srt=-90,cex=1.5,font=1)
# file contains info on MC2, Chemostat, data was taken daily
read=read.csv(file="datafiles/states_gluc.csv",header=T)
glucose=read$Glucose_um
time_glucose=read$Time
par(mar=c(5,30,1,5))
plot(read$Time,read$Glucose_um,type='o',ylim=c(0,12),xlab= " ",
ylab = " ",col='gray50',bg='gray50',pch=21,cex.axis=1,cex.lab=1,cex=1.2,lwd=1.4,
yaxt='n', xaxt='n') #xaxt=c(-144,0,48,120,240))
rect(xleft=0, xright=240, ybottom=0, ytop=12, col= rgb(0.7,0.7,0.7,alpha=0.2),
border=NA)
mtext(side=2, "Glucose (um)", padj=-3, col="gray50")
axis(side=2,at=c(0,4,8,12), cex=1.3)
axis(side=1, at=c(-144, 0, 24, 48, 72, 120, 240))
par(new = TRUE)
plot(read$Time,read$Atom..13C,pch=23,
bg='black',type = "o", axes = FALSE, bty = "n", xlab = "", ylab = "", ylim=c(0,30), cex=1.2)
mtext(side=4, "Atomic 13C (%)", line=3)
axis(side=4, at=c(0, 10,20,30))
mtext(side=1, "Time (hours)", line=3)
# legend('topleft',legend=c("Glucose (uM)","Atomic 13C"), pch =c(21,23), pt.bg= c('gray70','black'), bty='n')
mtext("c", adj=-.15,padj=-9,side=1,srt=-90,cex=1.5,font=1)
}
data<-read.csv("/Users/oliviaahern/Documents/MBL_WHOI/Trophic_Cascades/Experiment3/sequencing_14Jan26/edit_qc/exp3_bd.csv",header=T,row.names=1)
data=data.frame(data)
par(mfrow=c(1,3),
mar=c(4,4,1,1))
plot(
subset(data, Culture== "Batch" & Substrate =="Single" & Isotope=="C12")$R_MAX,
subset(data, Culture== "Batch" & Substrate =="Single" & Isotope=="C12")$BD,
type='o', pch=21, col='gray70',
bg=subset(data, Culture== "Batch" & Substrate =="Single" & Isotope=="C12")$col,
ylab="Buoyant Density (g/mL)",
xlab="Ratio of Maximum Quantity",
ylim=c(1.71,1.84),
xaxt='n')
axis(1, at=c(0,0.5,1))
lines(
subset(data, Culture== "Batch" & Substrate =="Single" & Isotope=="C13")$R_MAX,
subset(data, Culture== "Batch" & Substrate =="Single" & Isotope=="C13")$BD,
type='o', pch=21, col='black',
bg=subset(data, Culture== "Batch" & Substrate =="Single" & Isotope=="C13")$col)
plot(
subset(data, Culture== "Batch" & Substrate =="Multi" & Isotope=="C12")$R_MAX,
subset(data, Culture== "Batch" & Substrate =="Multi" & Isotope=="C12")$BD,
type='o', pch=21, col='gray70',
bg=subset(data, Culture== "Batch" & Substrate =="Multi" & Isotope=="C12")$col,
ylab="",
xlab="Ratio of Maximum Quantity",
ylim=c(1.71,1.84),
xaxt='n')
axis(1, at=c(0,0.5,1))
lines(
subset(data, Culture== "Batch" & Substrate =="Multi" & Isotope=="C13")$R_MAX,
subset(data, Culture== "Batch" & Substrate =="Multi" & Isotope=="C13")$BD,
type='o', pch=21, col='black', bg=subset(data, Culture== "Batch" & Substrate =="Multi" & Isotope=="C13")$col)
plot(
subset(data, Culture== "Chemostat" & Substrate =="Multi" & Isotope=="C12")$R_MAX,
subset(data, Culture== "Chemostat" & Substrate =="Multi" & Isotope=="C12")$BD,
type='o', pch=21, col='gray70',
bg=subset(data, Culture== "Chemostat" & Substrate =="Multi" & Isotope=="C12")$col,
ylab="",xlab="Ratio of Maximum Quantity",
ylim=c(1.71,1.84),
xaxt='n')
axis(1, at=c(0,0.5,1))
lines(
subset(data, Culture== "Chemostat" & Substrate =="Multi" & Isotope=="C13")$R_MAX,
subset(data, Culture== "Chemostat" & Substrate =="Multi" & Isotope=="C13")$BD,
type='o', pch=21, col='black',
bg=subset(data, Culture== "Chemostat" & Substrate =="Multi" & Isotope=="C13")$col)
data<-read.csv("datafiles/exp3_bd.csv",header=T,row.names=1)
qPCR_data=data.frame(data)
desired_order=c("Single", "Multi")
qPCR_data$Substrate <- factor(qPCR_data$Substrate, levels = desired_order)
ggplot(data=qPCR_data, aes(x=BD, y=R_MAX, col=Isotope))+
geom_point() +
geom_line() +
theme_bw() +
facet_wrap(Culture ~ Substrate) +
coord_flip() +
scale_color_manual(values=c("gray","black")) +
xlab("Buoyant Density (g/mL)") +
ylab("Ratio of Maximum Quantity")
numbs=read.csv(file='datafiles/copy_number.csv',
header=T)
number=numbs$x
x<-read.csv(file='datafiles/asv-table.csv',
header=TRUE,row.names=1)
x2=sweep(x, 1, number, "/") # divide asvs by 16S gene copy number
x2=ceiling(x2) # round it
OTU = otu_table(x2, taxa_are_rows=T)
taxa<-read.csv(file='datafiles/taxonomy.csv',
header=TRUE,row.names=1)
t<-as.matrix(taxa)
tax2<-tax_table(t)
map<-import_qiime_sample_data("datafiles/sampling_batch_exp3_combined.txt")
tree=read.newick("datafiles/tree.nwk")
phyo1 = phyloseq(OTU, tax2,map,tree)
# 1238 ASVs
phyo1 = subset_taxa(phyo1, !Order=="Chloroplast")
# 1125, removed 113 that were chloroplasts
phyo1 = subset_taxa(phyo1, !Family=="Mitochondria")
# 1104, removed 21 mitochondria
phyo1=subset_taxa(phyo1, !Phylum =="Cyanobacteria")
# 1089, removed 15 cyanobacteria
phyo1=subset_taxa(phyo1, !Phylum =="")
# 1078, removed 19 no assignment to phyla level
phyo_frac=subset_samples(phyo1, Isotope !="NA")
phyo_frac <- prune_samples(sample_sums(phyo_frac) > 0, phyo_frac)
phyo_frac=subset_samples(phyo1, Isotope !="NA")
ASVs_to_keep <- taxa_sums(phyo_frac) > 0
phyo_frac <- prune_taxa(ASVs_to_keep, phyo_frac)
map<-read.csv("datafiles/exp3_qpcr_combined.csv",
header=T,row.names=1)
physeq_rep3_t = OTU_qPCR_trans(phyo_frac, map)
batch_glu=
subset_samples(physeq_rep3_t, Culture=="Batch" & Treatment=="Single")
otu=otu_table(batch_glu)
deco=decostand(otu, 'pa')
frac2=merge_phyloseq(deco, sample_data(batch_glu), phy_tree(batch_glu), tax_table(batch_glu))
## Subset by tfrac1## Subset by treatment
ps1 <- prune_samples(frac2@sam_data$Isotope == "C12", frac2)
ps1 <- filter_taxa(ps1, function(x) sum(x) > 1, prune = TRUE)
ps2 <- prune_samples(frac2@sam_data$Isotope == "C13", frac2)
ps2 <- filter_taxa(ps2, function(x) sum(x) > 1, prune = TRUE)
## Get vectors of numbered OTUs/ASVs
treatment1 <- rownames(otu_table(ps1))
treatment2 <- rownames(otu_table(ps2))
## Get the intersection
shared <- intersect(treatment1, treatment2) # 97 ASVs shared between the C12 and C13 treatment
frac1=batch_glu
## Subset phyloseq object to shared taxa
ps.s <- subset(otu_table(frac1), rownames(otu_table(frac1)) %in% shared)
merged=merge_phyloseq(ps.s, tax_table(frac1), sample_data(frac1), phy_tree(frac1)) # 53 taxa
merged
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 98 taxa and 15 samples ]
## sample_data() Sample Data: [ 15 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 98 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 98 tips and 97 internal nodes ]
### run qSIP
atomX = qSIP_atom_excess(merged, control_expr='Isotope=="C12"', treatment_rep='Isotope')
df_atomX_boot = qSIP_bootstrap(atomX, n_boot=3,
isotope="13C")
df_atomX_boot2=na.omit(df_atomX_boot)
CI_threshold = 0.011
df_atomX_boot = df_atomX_boot %>%
mutate(Incorporator = A_CI_low > CI_threshold,
OTU = reorder(OTU, -A))
n_incorp = df_atomX_boot %>%
filter(Incorporator == TRUE) %>%
nrow
cat('Number of incorporators:', n_incorp, '\n')
## Number of incorporators: 84
## 83 incorp
df_dBD = delta_BD(merged, control_expr='Isotope=="C12"',
n=12)
stopifnot(nrow(df_atomX_boot) == nrow(df_dBD))
df_j = dplyr::inner_join(df_atomX_boot, df_dBD, c('OTU'='OTU'))
stopifnot(nrow(df_atomX_boot) == nrow(df_j))
df_j = df_j %>%
dplyr::mutate(OTU = reorder(OTU, -delta_BD))
batch_glu=df_j
batch_glui=
subset(batch_glu, Incorporator==TRUE)
batch_carb=
subset_samples(physeq_rep3_t, Culture=="Batch" & Treatment=="Multi")
otu=otu_table(batch_carb)
deco=decostand(otu, 'pa')
frac2=merge_phyloseq(deco, sample_data(batch_carb), phy_tree(batch_carb), tax_table(batch_carb))
## Subset by tfrac1## Subset by treatment
ps1 <- prune_samples(frac2@sam_data$Isotope == "C12", frac2)
ps1 <- filter_taxa(ps1, function(x) sum(x) > 1, prune = TRUE)
ps2 <- prune_samples(frac2@sam_data$Isotope == "C13", frac2)
ps2 <- filter_taxa(ps2, function(x) sum(x) > 1, prune = TRUE)
## Get vectors of numbered OTUs/ASVs
treatment1 <- rownames(otu_table(ps1))
treatment2 <- rownames(otu_table(ps2))
## Get the intersection
shared <- intersect(treatment1, treatment2) # 64 ASVs shared between the C12 and C13 treatment
frac1=batch_carb
## Subset phyloseq object to shared taxa
ps.s <- subset(otu_table(frac1), rownames(otu_table(frac1)) %in% shared)
merged=merge_phyloseq(ps.s, tax_table(frac1), sample_data(frac1), phy_tree(frac1)) # 53 taxa
merged
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 63 taxa and 12 samples ]
## sample_data() Sample Data: [ 12 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 63 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 63 tips and 62 internal nodes ]
### run qSIP
atomX = qSIP_atom_excess(merged, control_expr='Isotope=="C12"', treatment_rep='Isotope')
df_atomX_boot = qSIP_bootstrap(atomX, n_boot=1,isotope="13C")
df_atomX_boot2=na.omit(df_atomX_boot)
CI_threshold = 0.011
df_atomX_boot = df_atomX_boot %>%
mutate(Incorporator = A_CI_low > CI_threshold,
OTU = reorder(OTU, -A))
n_incorp = df_atomX_boot %>%
filter(Incorporator == TRUE) %>%
nrow
cat('Number of incorporators:', n_incorp, '\n')
## Number of incorporators: 60
## 61 incorp
df_dBD = delta_BD(merged, control_expr='Isotope=="C12"',
n=12)
stopifnot(nrow(df_atomX_boot) == nrow(df_dBD))
df_j = dplyr::inner_join(df_atomX_boot, df_dBD, c('OTU'='OTU'))
stopifnot(nrow(df_atomX_boot) == nrow(df_j))
df_j = df_j %>%
dplyr::mutate(OTU = reorder(OTU, -delta_BD))
batch_carb=df_j
batch_carbi=
subset(batch_carb, Incorporator==TRUE)
chemo=
subset_samples(physeq_rep3_t, Culture=="Chemostat" & Treatment=="Multi")
otu=otu_table(chemo)
deco=decostand(otu, 'pa')
frac2=merge_phyloseq(deco, sample_data(chemo), phy_tree(chemo), tax_table(chemo))
## Subset by tfrac1## Subset by treatment
ps1 <- prune_samples(frac2@sam_data$Isotope == "C12", frac2)
ps1 <- filter_taxa(ps1, function(x) sum(x) > 1, prune = TRUE)
ps2 <- prune_samples(frac2@sam_data$Isotope == "C13", frac2)
ps2 <- filter_taxa(ps2, function(x) sum(x) > 1, prune = TRUE)
## Get vectors of numbered OTUs/ASVs
treatment1 <- rownames(otu_table(ps1))
treatment2 <- rownames(otu_table(ps2))
## Get the intersection
shared <- intersect(treatment1, treatment2) # 78 ASVs shared between the C12 and C13 treatment
frac1=chemo
## Subset phyloseq object to shared taxa
ps.s <- subset(otu_table(frac1), rownames(otu_table(frac1)) %in% shared)
merged=merge_phyloseq(ps.s, tax_table(frac1), sample_data(frac1), phy_tree(frac1)) # 53 taxa
merged
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 77 taxa and 23 samples ]
## sample_data() Sample Data: [ 23 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 77 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 77 tips and 76 internal nodes ]
### run qSIP
atomX = qSIP_atom_excess(merged, control_expr='Isotope=="C12"', treatment_rep='Isotope')
df_atomX_boot = qSIP_bootstrap(atomX, n_boot=1,isotope="13C")
df_atomX_boot2=na.omit(df_atomX_boot)
CI_threshold = 0.011
df_atomX_boot = df_atomX_boot %>%
mutate(Incorporator = A_CI_low > CI_threshold,
OTU = reorder(OTU, -A))
n_incorp = df_atomX_boot %>%
filter(Incorporator == TRUE) %>%
nrow
cat('Number of incorporators:', n_incorp, '\n')
## Number of incorporators: 53
## 52 incorp
df_dBD = delta_BD(merged, control_expr='Isotope=="C12"',
n=12)
stopifnot(nrow(df_atomX_boot) == nrow(df_dBD))
df_j = dplyr::inner_join(df_atomX_boot, df_dBD, c('OTU'='OTU'))
stopifnot(nrow(df_atomX_boot) == nrow(df_j))
df_j = df_j %>%
dplyr::mutate(OTU = reorder(OTU, -delta_BD))
chemo_carb=df_j
chemo_carbi=
subset(chemo_carb, Incorporator==TRUE)
numbs=read.csv(file='datafiles/copy_number.csv',header=T)
batch_glu$Culture=c(rep("Batch",98))
batch_glu$Substrate=c(rep("Single",98))
batch_glu$copy_number=(subset(numbs, label %in% batch_glu$OTU))$x
batch_glu$id=c(rep("batch_glu",98))
batch_carb$Culture=c(rep("Batch",63))
batch_carb$Substrate=c(rep("Multi", 63))
batch_carb$copy_number=(subset(numbs, label %in% batch_carb$OTU))$x
batch_carb$id=c(rep("batch_carb",63))
chemo_carb$Culture=c(rep("Chemostat",77))
chemo_carb$Substrate=c(rep("Multi",77))
chemo_carb$copy_number=(subset(numbs, label %in% chemo_carb$OTU))$x
chemo_carb$id=c(rep("chemo_carb",77))
s1=bind_rows(batch_glu, batch_carb)
all=bind_rows(s1, chemo_carb)
taxa=tax_table(phyo1)
colnames(taxa)
## [1] "Domain" "Phylum" "Class" "Order" "Family" "Genus" "Species"
## [8] "Strain"
colnames(taxa)=c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species", "OTU")
qsip_data=merge(all, taxa, by="OTU")
write.csv(qsip_data, 'datafiles/qsip_data_exp3.csv')
looking at all ASVs that were in the buoyant density gradients (must be present at in at least 2 fractions of both enriched and unenriched) and just the incorporators
par(mfrow=c(2,2))
boxplot(qsip_data$copy_number~qsip_data$id, xlab=" ", ylab="16S rRNA copy number", main = "16S copy number vs. treatments, all ASVs")
a=aov(qsip_data$copy_number~qsip_data$id)
summary(a)
## Df Sum Sq Mean Sq F value Pr(>F)
## qsip_data$id 2 2.0 1.011 0.404 0.668
## Residuals 235 588.4 2.504
tuk=TukeyHSD(a)
plot(tuk,las=2)
incorp_only = subset(qsip_data, Incorporator == "TRUE")
boxplot(incorp_only$copy_number~incorp_only$id, xlab=" ", ylab="16S rRNA copy number", main = "16S copy number vs. treatments, only incorp")
a=aov(incorp_only$copy_number~incorp_only$id)
summary(a)
## Df Sum Sq Mean Sq F value Pr(>F)
## incorp_only$id 2 1.4 0.6989 0.256 0.774
## Residuals 194 528.8 2.7259
tuk=TukeyHSD(a)
plot(tuk,las=2)
looking at all ASVs that were in the buoyant density gradients (must be present at in at least 2 fractions of both enriched and unenriched) and just the incorporators
par(mfrow=c(2,2))
boxplot(qsip_data$A~qsip_data$id, main="EAF among groups, all ASVs")
a=aov(qsip_data$A~qsip_data$id)
summary(a)
## Df Sum Sq Mean Sq F value Pr(>F)
## qsip_data$id 2 2.825 1.4123 22.3 1.35e-09 ***
## Residuals 235 14.880 0.0633
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tuk=TukeyHSD(a)
plot(tuk,las=2)
incorp_only = subset(qsip_data, Incorporator == "TRUE")
boxplot(incorp_only$A~incorp_only$id, main="EAF among groups, only incorp")
a=aov(incorp_only$A~incorp_only$id)
summary(a)
## Df Sum Sq Mean Sq F value Pr(>F)
## incorp_only$id 2 1.199 0.5993 21.92 2.62e-09 ***
## Residuals 194 5.304 0.0273
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tuk=TukeyHSD(a)
plot(tuk,las=2)
ag <- aggregate(A ~ id, incorp_only, function(x) c(mean = mean(x), sd = sd(x)))
par(mfrow=c(2,2),mar=c(8,5,5,1))
incorp_only = subset(qsip_data, Incorporator == "TRUE")
boxplot(incorp_only$A~incorp_only$Class,las=2)
a=aov(incorp_only$A~incorp_only$Class)
summary(a)
## Df Sum Sq Mean Sq F value Pr(>F)
## incorp_only$Class 12 0.748 0.06231 1.992 0.0271 *
## Residuals 184 5.755 0.03128
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tuk=TukeyHSD(a)
plot(tuk,las=2)
# bacteroidia had highest copy number
gamma = subset(incorp_only, Class == "Bacteroidia")
mean(gamma$A)
## [1] 0.3342792
sd(gamma$A)
## [1] 0.16447
range(gamma$A)
## [1] 0.04734661 0.79093808
boxplot(incorp_only$copy_number~incorp_only$Class,las=2)
a=aov(incorp_only$copy_number~incorp_only$Class)
summary(a)
## Df Sum Sq Mean Sq F value Pr(>F)
## incorp_only$Class 12 83.2 6.936 2.855 0.00126 **
## Residuals 184 447.0 2.429
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tuk=TukeyHSD(a)
plot(tuk,las=2)
range(incorp_only$copy_number)
## [1] 1 9
# gamma had the most
gamma = subset(incorp_only, Class == "Gammaproteobacteria")
mean(gamma$copy_number)
## [1] 3.382979
sd(gamma$copy_number)
## [1] 1.73898
par(mfrow=c(1,2))
boxplot(incorp_only$Wlight~incorp_only$id)
a=aov(incorp_only$Wlight~incorp_only$id)
summary(a)
## Df Sum Sq Mean Sq F value Pr(>F)
## incorp_only$id 2 0.006092 0.0030461 295.3 <2e-16 ***
## Residuals 194 0.002001 0.0000103
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tuk=TukeyHSD(a)
plot(tuk,las=2)
tuk
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = incorp_only$Wlight ~ incorp_only$id)
##
## $`incorp_only$id`
## diff lwr upr p adj
## batch_glu-batch_carb 0.001235454 -4.677719e-05 0.002517686 0.0616602
## chemo_carb-batch_carb -0.011764357 -1.319432e-02 -0.010334389 0.0000000
## chemo_carb-batch_glu -0.012999811 -1.433052e-02 -0.011669102 0.0000000
ag <- aggregate(Wlight ~ id, incorp_only, function(x) c(mean = mean(x), sd = sd(x)))
ag
## id Wlight.mean Wlight.sd
## 1 batch_carb 1.773856480 0.002784779
## 2 batch_glu 1.775091934 0.002456411
## 3 chemo_carb 1.762092123 0.004478331
incorp_only = subset(qsip_data, Incorporator == "TRUE")
plot(incorp_only$Wlight, incorp_only$A)
ll=lm(incorp_only$A~incorp_only$Wlight)
summary(ll)
##
## Call:
## lm(formula = incorp_only$A ~ incorp_only$Wlight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31779 -0.12951 -0.03567 0.09235 0.49869
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12.445 3.478 -3.578 0.000437 ***
## incorp_only$Wlight 7.176 1.964 3.654 0.000331 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1767 on 195 degrees of freedom
## Multiple R-squared: 0.06409, Adjusted R-squared: 0.05929
## F-statistic: 13.35 on 1 and 195 DF, p-value: 0.0003315
abline(ll)
plot(incorp_only$Wlight, incorp_only$copy_number)
ll=lm(incorp_only$copy_number~incorp_only$Wlight)
summary(ll)
##
## Call:
## lm(formula = incorp_only$copy_number ~ incorp_only$Wlight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7822 -0.7146 -0.5811 0.4135 6.4083
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -16.82 32.44 -0.518 0.605
## incorp_only$Wlight 11.00 18.31 0.600 0.549
##
## Residual standard error: 1.647 on 195 degrees of freedom
## Multiple R-squared: 0.001846, Adjusted R-squared: -0.003273
## F-statistic: 0.3605 on 1 and 195 DF, p-value: 0.5489
abline(ll)
incorp_only = subset(qsip_data, Incorporator == "TRUE")
plot(incorp_only$Wlight, incorp_only$A)
par(mfrow=c(1,3),mar=c(5,5,1,1))
glu_only=subset(incorp_only, id=="batch_glu")
plot(glu_only$A, glu_only$Wlight, pch=21, cex=1.22, bg='black', xlab="AFE", ylab="Gene Copy Number")
ll=lm(glu_only$Wlight~glu_only$A)
summary(ll)
##
## Call:
## lm(formula = glu_only$Wlight ~ glu_only$A)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0087071 -0.0010528 -0.0001573 0.0004101 0.0072168
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.7757854 0.0004909 3617.753 <2e-16 ***
## glu_only$A -0.0020478 0.0012198 -1.679 0.097 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.00243 on 82 degrees of freedom
## Multiple R-squared: 0.03323, Adjusted R-squared: 0.02144
## F-statistic: 2.818 on 1 and 82 DF, p-value: 0.097
abline(ll)
batch_carbon=subset(incorp_only, id=="batch_carb")
plot(batch_carbon$A, batch_carbon$Wlight, pch=21, cex=1.22, bg='black', xlab="AFE", ylab="Gene Copy Number")
ll=lm(batch_carbon$Wlight~batch_carbon$A)
summary(ll)
##
## Call:
## lm(formula = batch_carbon$Wlight ~ batch_carbon$A)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0065542 -0.0009677 0.0001807 0.0008690 0.0052887
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.7784295 0.0007308 2433.58 < 2e-16 ***
## batch_carbon$A -0.0170353 0.0025274 -6.74 7.99e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002103 on 58 degrees of freedom
## Multiple R-squared: 0.4392, Adjusted R-squared: 0.4296
## F-statistic: 45.43 on 1 and 58 DF, p-value: 7.994e-09
abline(ll)
chemo_carbon=subset(incorp_only, id=="chemo_carb")
plot(chemo_carbon$A, chemo_carbon$Wlight, pch=21, cex=1.22, bg='black', xlab="AFE", ylab="Gene Copy Number")
ll=lm(chemo_carbon$Wlight~chemo_carbon$A)
summary(ll)
##
## Call:
## lm(formula = chemo_carbon$Wlight ~ chemo_carbon$A)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0140654 -0.0006758 0.0011694 0.0024761 0.0065914
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.762946 0.001023 1723.186 <2e-16 ***
## chemo_carbon$A -0.005825 0.005577 -1.044 0.301
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.004474 on 51 degrees of freedom
## Multiple R-squared: 0.02094, Adjusted R-squared: 0.001744
## F-statistic: 1.091 on 1 and 51 DF, p-value: 0.3012
abline(ll)
#dim(incorp_only)
#incorp_only$Class
summary(as.factor(incorp_only$Class))
## Acidimicrobiia Actinobacteria Alphaproteobacteria Bacteroidia
## 4 11 48 54
## Chloroflexia Deltaproteobacteria Gammaproteobacteria OM190
## 1 3 47 1
## Planctomycetacia Rhodothermia Thermoleophilia vadinHA49
## 12 2 1 1
## Verrucomicrobiae
## 12
custom_colors_class=c("Actinobacteria"="#ff6b92",
"Alphaproteobacteria"="#aa003a",
"Bacteroidia"="#792e00",
"Cyanobacteriia"="#ffae55",
"Deltaproteobacteria"="#63af33",
"Gammaproteobacteria" = "#3eeb93",
"OM190" = "#0060ad",
"Planctomycetes"= "#9281fd",
"Rhodothermia" = "#3f0e50",
"vadinHA49" = "#e747a0",
"Verrucomicrobiae" = "#ff6eb1")
desired_order=c("Single", "Multi")
incorp_only$Substrate <- factor(incorp_only$Substrate, levels = desired_order)
ggplot(incorp_only, aes(x=A, y=Wlight, fill=Class, color=Class)) +
geom_point(size=as.numeric(incorp_only$copy_number)) +
#geom_point(size=3) +
facet_wrap(Culture ~ Substrate) +
theme_bw() +
theme(legend.position='bottom') +
scale_fill_manual(values=custom_colors_class) +
scale_color_manual(values=custom_colors_class) +
xlab("Excess Atomic Fraction") +
ylab(expression(" "*C^12*" Control Buoyant Density (g/mL)"))
formula = incorp_only$copy_number ~ incorp_only$A * incorp_only$Culture * incorp_only$Substrate
ggplot(incorp_only, aes(x=A, y=copy_number)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(Culture ~ Substrate) +
theme_bw() +
xlab("Excess Atomic Fraction") +
ylab("16S Gene Copy Number") +
theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
#tat_poly_line(formula = formula) +
# stat_poly_eq(formula = formula)
par(mfrow=c(1,3))
glu_only=subset(incorp_only, id=="batch_glu")
plot(glu_only$A, glu_only$copy_number, pch=21, cex=1.22, bg='black', xlab="AFE", ylab="Gene Copy Number")
ll=lm(glu_only$copy_number~glu_only$A)
summary(ll)
##
## Call:
## lm(formula = glu_only$copy_number ~ glu_only$A)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3094 -1.1104 -0.3466 0.7604 4.9769
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.3970 0.3158 4.424 2.96e-05 ***
## glu_only$A 3.9952 0.7848 5.091 2.23e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.563 on 82 degrees of freedom
## Multiple R-squared: 0.2401, Adjusted R-squared: 0.2309
## F-statistic: 25.92 on 1 and 82 DF, p-value: 2.23e-06
abline(ll)
text(0.2, 8, "R2 = 0.25
F = 28.54
p = 8.25e-7")
batch_carbon=subset(incorp_only, id=="batch_carb")
plot(batch_carbon$A, batch_carbon$copy_number, pch=21, cex=1.22, bg='black', xlab="AFE", ylab="Gene Copy Number")
ll=lm(batch_carbon$copy_number~batch_carbon$A)
summary(ll)
##
## Call:
## lm(formula = batch_carbon$copy_number ~ batch_carbon$A)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6989 -1.0372 -0.3140 0.4577 6.5021
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.640 0.529 3.100 0.00298 **
## batch_carbon$A 3.700 1.830 2.022 0.04778 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.523 on 58 degrees of freedom
## Multiple R-squared: 0.06586, Adjusted R-squared: 0.04975
## F-statistic: 4.089 on 1 and 58 DF, p-value: 0.04778
abline(ll)
text(0.2, 8, "R2 = 0.03
F = 2.10
p = 0.15")
chemo_carbon=subset(incorp_only, id=="chemo_carb")
plot(chemo_carbon$A, chemo_carbon$copy_number, pch=21, cex=1.22, bg='black', xlab="AFE", ylab="Gene Copy Number")
ll=lm(chemo_carbon$copy_number~chemo_carbon$A)
summary(ll)
##
## Call:
## lm(formula = chemo_carbon$copy_number ~ chemo_carbon$A)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0684 -0.6138 -0.4826 0.5223 6.4817
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.4076 0.3515 6.849 9.43e-09 ***
## chemo_carbon$A 0.9518 1.9163 0.497 0.622
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.537 on 51 degrees of freedom
## Multiple R-squared: 0.004814, Adjusted R-squared: -0.0147
## F-statistic: 0.2467 on 1 and 51 DF, p-value: 0.6215
abline(ll)
These are to see if there are differences in commuinty between the batch and chemostat because there was different innoculum and different sequencing centers
GP <- transform_sample_counts(phyo_frac, function(x) x+1)
phy_tree(GP) <- makeNodeLabel(phy_tree(GP), method="number", prefix='n')
name.balance(phy_tree(GP), tax_table(GP), 'n1')
## [1] "Class_Actinobacteria/Domain_Bacteria"
otu.table <- t(otu_table(GP))
treefr <- phy_tree(GP)
metadata <- sample_data(GP)
tax <- tax_table(GP)
gp.philr <- philr(otu.table, treefr,
part.weights='enorm.x.gm.counts',
ilr.weights='blw.sqrt')
gp.dist <- dist(gp.philr, method="euclidean")
gp.pcoa <- ordinate(GP, 'PCoA', distance=gp.dist)
{par(mar=c(5,5,1,7))
plot(gp.pcoa$vectors[,1],gp.pcoa$vectors[,2],
pch=21,
bg=as.factor(sample_data(GP)$Culture),
xlab = "PCoA1 85.51%", ylab= "PCoA2 4.86%",xpd=F)
ordiellipse(gp.pcoa$vectors,
group=as.factor(sample_data(GP)$Culture),
label=T,xpd=F,
kind ='sd',conf=0.8)
}
adonis2(gp.dist ~ as.factor(sample_data(GP)$Culture) +as.factor(sample_data(GP)$Substrate))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = gp.dist ~ as.factor(sample_data(GP)$Culture) + as.factor(sample_data(GP)$Substrate))
## Df SumOfSqs R2 F Pr(>F)
## as.factor(sample_data(GP)$Culture) 1 1178.81 0.81320 216.9951 0.001 ***
## as.factor(sample_data(GP)$Substrate) 3 26.33 0.01816 1.6157 0.164
## Residual 45 244.46 0.16864
## Total 49 1449.60 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# phyo_frac2=subset_taxa(phyo_frac, Strain %in% incorp_only$OTU)
# for some reason the above command doesn't work in my markdown so I just saved it
phyo_frac2=readRDS('datafiles/phyo_frac2')
GP <- transform_sample_counts(phyo_frac2, function(x) x+1)
phy_tree(GP) <- makeNodeLabel(phy_tree(GP), method="number", prefix='n')
name.balance(phy_tree(GP), tax_table(GP), 'n1')
## [1] "Class_Actinobacteria/Domain_Bacteria"
otu.table <- t(otu_table(GP))
treefr <- phy_tree(GP)
metadata <- sample_data(GP)
tax <- tax_table(GP)
gp.philr <- philr(otu.table, treefr,
part.weights='enorm.x.gm.counts',
ilr.weights='blw.sqrt')
gp.dist <- dist(gp.philr, method="euclidean")
gp.pcoa <- ordinate(GP, 'PCoA', distance=gp.dist)
{par(mar=c(5,5,1,7))
plot(gp.pcoa$vectors[,1],gp.pcoa$vectors[,2],
pch=21,
bg=as.factor(sample_data(GP)$Culture),
xlab = "PCoA1 89.28 %", ylab= "PCoA2 3.31 %",xpd=F) # from gp.pcoa$values$Relative_eig
ordiellipse(gp.pcoa$vectors,
group=as.factor(sample_data(GP)$Culture),
label=T,xpd=F,
kind ='sd',conf=0.8)
par(xpd=T)
}
adonis2(gp.dist ~ as.factor(sample_data(GP)$Culture) +as.factor(sample_data(GP)$Treatment), by='margin')
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = gp.dist ~ as.factor(sample_data(GP)$Culture) + as.factor(sample_data(GP)$Treatment), by = "margin")
## Df SumOfSqs R2 F Pr(>F)
## as.factor(sample_data(GP)$Culture) 1 1324.73 0.48942 152.408 0.001 ***
## as.factor(sample_data(GP)$Treatment) 1 25.98 0.00960 2.989 0.071 .
## Residual 47 408.52 0.15093
## Total 49 2706.75 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1